####################################################################################
# Keeping Vigil: The Emergence of Vigilance Committees in Pre-Civil War America.  #
# Jonathan Obert & Eleonora Mattiacci                                             #
# Last modified: January 31, 2018                                                 #
###################################################################################

## This R file contains the code necessary to replicate the analysis in the Appendix, except Tables 3 and 5 (see separate Stata do files).
## The analysis was carried out using R version 3.4.1 on a 3.5 GHz Intel Core i7 running OS Sierra 10.12.6

rm(list=ls())
#loading packages (function from Huff and Kertzer 2017)
ipak <- function(pkg){  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
                        if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
                        sapply(pkg, require, character.only = TRUE)
}

packages <- c("foreign","survival", "simPH", "extrafont", "RColorBrewer", "MASS") 
ipak(packages)

#####
# Setting working directory + loading data
#####

setwd("") 
dat<-read.dta("data_replication.dta")
dat1<-subset(dat,dat$multiple==0)
dat1$bret<-dat1$bre*log(dat1$stop + 1)
dat$bret<-dat$bre*log(dat$stop+1)


#####
# Code to reproduce Table 1
#####

# We first ran the simplest model. A Ph test indicates the need to add an interaction of Borders with Time
#model0m<-coxph(Surv(start, stop, committee)~  ef_i+ bre +cluster(fips),data=dat1,method="efron");summary(model0m);cox.zph(model0m)

model0mt<-coxph(Surv(start, stop, committee)~  ef_i+ bre + bret +cluster(fips),data=dat1,method="efron");summary(model0mt)
AIC(model0mt);BIC(model0mt);logLik(model0mt)

#####
# Code to reproduce Figure 1
#####

#Simulations
 sims<-1000
 set.seed(031415)
 sim.betas<-mvrnorm(sims,mu= model0mt$coef,Sigma= model0mt$var)

#Plot 
 b1<-sim.betas[,2]
 b2<-sim.betas[,3]

t.sim<-seq(1, 10, by=1)
out<-matrix(0, nrow=length(t.sim), ncol=3)

 for(i in 1:length(t.sim)){
	 simfit<- ((b1+(b2*log(t.sim[i]))))
	 out[i,]<-c(median(simfit), quantile(simfit, c(.025, .975)))
	 }

 #Plotting the Simulation
 plot(t.sim, out[,3], type="n", xlab=list("Time (years)",cex=.9), ylab=list("Combined Coefficient", cex=.9), main="", xlim=c(0,10),ylim=c(-14,14))
 lines(t.sim, out[,1], lty=1)
 lines(t.sim, out[,2], lty=2)
 lines(t.sim, out[,3], lty=2)
 abline(h=0, lwd=2)
 abline(v=8.2, lwd=1, col = "grey")
 
#####
# Code to reproduce Model 1 in  Table 2
#####

# We first ran the simplest model. A Ph test indicates the need to add an interaction of Borders with Time
# model1<-coxph(Surv(start, stop, committee)~  ef_i+ bre+ ef_i_bre +cluster(fips),data=dat,method="efron");summary(model1);cox.zph(model1);cox.zph(model1)

model1t<-coxph(Surv(start, stop, committee)~  ef_i+ bre+ ef_i_bre +bret +cluster(fips),data=dat,method="efron");summary(model1t)
AIC(model1t);BIC(model1t);logLik(model1t)

#####
# Code to reproduce Model 1 in  Table 2
#####
# We first ran the simplest model. A Ph test indicates the need to add an interaction of Time with Ethnic Fractionalization, Urbanization, and Slavery. When we add them, none of the interactions are significant, nor do the patterns of significance change in the model when adding them, so we don't include these insignificant terms in the model.
#dat$ef_it<-dat$ef_i*log(dat$stop + 1)
#dat$urb_it<-dat$urb_i*log(dat$stop + 1)
#dat$sla_it<-dat$sla_i*log(dat$stop + 1)
#model2t<-coxph(Surv(start, stop, committee)~  ef_i+ bre+ ef_i_bre +gini_i+manu_i+urb_i+sla_i+sla_it+ urb_it +ef_it+cluster(fips),data=dat,method="efron");summary(model2t);

model2<-coxph(Surv(start, stop, committee)~  ef_i+ bre+ ef_i_bre +gini_i+manu_i+urb_i+sla_i+cluster(fips),data=dat,method="efron");summary(model2);cox.zph(model2)
AIC(model2);BIC(model2);logLik(model2)

#Effect of GINI
z<-(exp(model2$coef[4])-1)*100;z
#Effect of Slavery
z<-(exp(model2$coef[7])-1)*100;z

#####
# Code to reproduce Figure 2, Appendix
#####

dat2<-data.frame(ef_i=c(quantile(dat$ef_i,probs=c(.25),na.rm=TRUE),quantile(dat$ef_i,probs=c(.75),na.rm=TRUE)),bre=c(0,1),ef_i_bre=c((quantile(dat$ef_i,probs=c(.25),na.rm=TRUE)*0),(quantile(dat$ef_i,probs=c(.75),na.rm=TRUE)*1)),bret=c(0,1))

lightgray<-"#d9d9d9";darkgray<-"#525252";gray<-"#666666"
col_seq<-c(as.vector(c(gray,darkgray,darkgray)),as.vector(c("black",lightgray,lightgray)))
line_seq<-c(as.vector(c(2,1,1)),as.vector(c(2,1,1)))

fit.mod <-summary(survfit(model1t),newdata=dat2)

plot(survfit(model1t,newdata=dat2),ylim=c(0.60,1) ,conf.int=T,,lty=c(as.vector(c(1,2,2)),as.vector(c(1,2,2))),lwd=line_seq,col= col_seq,ylab="Proportion of Counties With No Committee",xlab="Years till Committee Formation",xlim=c(0,11))
legend("bottomleft",c("No Social Frontier Conditions","95%C.I.","" ,"Social Frontier Conditions","95%CI"),lty=c(1,2,0,1,2),col=c(gray,darkgray,"","black",lightgray),lwd=c(3,3),border=lightgray,cex = .85,bty="o",box.col=lightgray)


#####
# Code to reproduce Model 1 Table 4
#####


model14<-coxph(Surv(start, stop, committee)~  ef+ bre+ ef_bre +cluster(fips),data=dat1,method="efron");summary(model14);cox.zph(model14)
AIC(model14);BIC(model14);logLik(model14)

#####
# Code to reproduce Model 2 Table 4
#####

model24<-coxph(Surv(start, stop, committee)~  ef+ bre+ ef_bre +gini+manu+urb+sla+cluster(fips),data=dat1,method="efron");summary(model24);cox.zph(model24)
AIC(model24);BIC(model24);logLik(model24)

#####
# Code to reproduce Model 2 Table 4
#####

lightgray<-"#d9d9d9";darkgray<-"#525252";gray<-"#666666"
col_seq<-c(as.vector(c(gray,darkgray,darkgray)),as.vector(c("black",lightgray,lightgray)))
line_seq<-c(as.vector(c(2,1,1)),as.vector(c(2,1,1)))


dat2m<-data.frame(ef=c(quantile(dat1$ef,probs=c(.25),na.rm=TRUE),quantile(dat1$ef,probs=c(.75),na.rm=TRUE)),bre=c(0,1),ef_bre=c((quantile(dat1$ef,probs=c(.25),na.rm=TRUE)*0),(quantile(dat1$ef,probs=c(.75),na.rm=TRUE)*1)))

plot(survfit(model14,newdata=dat2m),ylim=c(0.85,1),conf.int=T,lty=c(as.vector(c(1,2,2)),as.vector(c(1,2,2))),lwd=line_seq,col= col_seq,ylab="Proportion of Counties With No Committee",xlab="Years till Committee Formation",xlim=c(0,11))
legend("bottomleft",c("No Social Frontier Conditions","95%C.I.","" ,"Social Frontier Conditions","95%CI"),lty=c(1,2,0,1,2),col=c(gray,darkgray,"","black",lightgray),lwd=c(3,3),border=lightgray,cex = .85,bty="o",box.col=lightgray)


